########################################################
## PROGRAM NAME: 100_final_clean.R                    ##
## AUTHOR: MATT MLECZKO                               ##
## INPUTS:                                            ##
##         ccs_final.Rda                              ##
##         002_wrld_nllus_place_2006.Rda              ##
##         002_nzlu_muni_2022.Rda                     ##
##         002_wrld_nllus_msa_2006.Rda                ##
##         002_nzlu_msa_2022.Rda                      ##
##         001_msa_del.Rda                            ##
##         003_msa_0922.Rda                           ##
##         004_allmunis_2009.Rda                      ##
##         004_allmunis_2022.Rda                      ##
##         004_trt2000_basevars.Rda                   ##
##         004_trt2012_basevars.Rda                   ##
##         002_tract_to_cs.Rda                        ##
##         002_tract_to_pl.Rda                        ##
##         004_npov_tracts_2009.Rda                   ##
##         004_npov_tracts_2022.Rda                   ##
##         004_hpov_tracts_2009.Rda                   ##
##         004_hpov_tracts_2022.Rda                   ##
##         004_npov_tracts_mm_2009.Rda                ##
##         004_npov_tracts_mm_2022.Rda                ##
##         004_hpov_tracts_mm_2009.Rda                ##
##         004_hpov_tracts_mm_2022.Rda                ##
##         004_npov_muni_2009.Rda                     ##
##         004_npov_muni_2022.Rda                     ##
##         004_hpov_muni_2009.Rda                     ##
##         004_hpov_muni_2022.Rda                     ##
##         005_msas_keep.Rda                          ##
##         005_munis_cov.Rda                          ##
##         005_muni_msa_2009.Rda                      ##
##         005_muni_msa_2022.Rda                      ##
##                                                    ##
## OUTPUTS:                                           ##
##         100_af_msa.Rda                             ##
##         100_af_muni.Rda                            ##
##         100_nzlu_msa_panel.Rda                     ##
##         100_nzlu_muni_panel.Rda                    ##
##         100_zri_t1.csv                             ##
##         100_zri_t2.csv                             ##
##                                                    ## 
## PURPOSE: Combine data and create muni and          ##
##          MSA-level analytic files                  ##
##                                                    ##
## LIST OF UPDATES:                                   ##
########################################################

#log <- file(# USER DEFINED PATH AND FILE NAME HERE #)
#sink(log, append=TRUE)
#sink(log, append=TRUE, type="message")

## load libraries ##

library(tidycensus)
library(tidyverse)
library(foreign)
library(stringr)
library(tm)
library(gdata)
library(gsubfn)
library(haven)
library(readxl)
library(data.table)

## define paths ##
pr_path <- # USER DEFINED PATH HERE #

## set working directory ##
setwd(pr_path)

`%notin%` <- Negate(`%in%`)

## create a merge function that creates merge frequency as in Stata ##
## from user rwbuie at stackoverflow: https://stackoverflow.com/questions/30358401/is-there-a-way-to-create-statas-merge-indicator-variable-with-rs-merge ##
stata.merge <- function(x,y, name){
  x$df1 <- 1
  y$df2 <- 2
  df <- merge(x,y, by = name, all = TRUE)
  df$merge.variable <- rowSums(df[,c("df1", "df2")], na.rm=TRUE)
  df$df1 <- NULL
  df$df2<- NULL
  df
  #print(table(df$merge.variable))
  
  ## return the merged dataframe
  return(df)
}

## load in all data ##

load("ccs_final.Rda")
load("002_wrld_nllus_place_2006.Rda")
load("002_nzlu_muni_2022.Rda")
load("002_wrld_nllus_msa_2006.Rda")
load("002_nzlu_msa_2022.Rda")

load("001_msa_del.Rda")
load("003_msa_0922.Rda")

load("004_allmunis_2009.Rda")
load("004_allmunis_2022.Rda")
load("004_trt2000_basevars.Rda")
load("004_trt2012_basevars.Rda")

load("002_tract_to_cs.Rda")
load("002_tract_to_pl.Rda")

load("004_npov_tracts_2009.Rda")
load("004_npov_tracts_2022.Rda")
load("004_hpov_tracts_2009.Rda")
load("004_hpov_tracts_2022.Rda")

load("004_npov_tracts_mm_2009.Rda")
load("004_npov_tracts_mm_2022.Rda")
load("004_hpov_tracts_mm_2009.Rda")
load("004_hpov_tracts_mm_2022.Rda")

load("004_npov_muni_2009.Rda")
load("004_npov_muni_2022.Rda")
load("004_hpov_muni_2009.Rda")
load("004_hpov_muni_2022.Rda")

load("005_msas_keep.Rda")
load("005_munis_cov.Rda")

load("005_muni_msa_2009.Rda")
load("005_muni_msa_2022.Rda")

## count number of impoverished tracts per msa ##

npov.trt.msa.2009 <- npov.tracts.2009.fin %>%
  mutate(FIPS = substr(GEOID,1,5)) %>%
  left_join(msa.del.2020.rd, "FIPS") %>%
  group_by(cbsa10) %>%
  summarize(npov_tracts_2009 = n(), .groups = "drop") 

npov.trt.msa.2022 <- npov.tracts.2022.fin %>%
  mutate(FIPS = substr(GEOID,1,5)) %>%
  left_join(msa.del.2020.rd, "FIPS") %>%
  group_by(cbsa10) %>%
  summarize(npov_tracts_2022 = n(), .groups = "drop")

npov.trt.msa <- npov.trt.msa.2009 %>%
  full_join(npov.trt.msa.2022, "cbsa10") %>%
  rename(CBSA = cbsa10)

hpov.trt.msa.2009 <- hpov.tracts.2009.fin %>%
  mutate(FIPS = substr(GEOID,1,5)) %>%
  left_join(msa.del.2020.rd, "FIPS") %>%
  group_by(cbsa10) %>%
  summarize(hpov_tracts_2009 = n(), .groups = "drop")

hpov.trt.msa.2022 <- hpov.tracts.2022.fin %>%
  mutate(FIPS = substr(GEOID,1,5)) %>%
  left_join(msa.del.2020.rd, "FIPS") %>%
  group_by(cbsa10) %>%
  summarize(hpov_tracts_2022 = n(), .groups = "drop")

hpov.trt.msa <- hpov.trt.msa.2009 %>%
  full_join(hpov.trt.msa.2022, "cbsa10") %>%
  rename(CBSA = cbsa10)

hpov.cc.trt.msa.2009 <- hpov.tracts.2009.fin %>%
  mutate(FIPS = substr(GEOID,1,5)) %>%
  left_join(msa.del.2020.rd, "FIPS") %>%
  left_join(select(tract.to.pl.metro, GEOID, placefp), "GEOID") %>%
  mutate(GEOID_muni = paste0(`FIPS State Code`,placefp)) %>%
  filter(GEOID_muni %in% ccs.final$GEOID) %>%
  group_by(cbsa10) %>%
  summarize(hpov_cc_tracts_2009 = n(), .groups= "drop")

hpov.cc.trt.msa.2022 <- hpov.tracts.2022.fin %>%
  mutate(FIPS = substr(GEOID,1,5)) %>%
  left_join(msa.del.2020.rd, "FIPS") %>%
  left_join(select(tract.to.pl.metro, GEOID, placefp), "GEOID") %>%
  mutate(GEOID_muni = paste0(`FIPS State Code`,placefp)) %>%
  filter(GEOID_muni %in% ccs.final$GEOID) %>%
  group_by(cbsa10) %>%
  summarize(hpov_cc_tracts_2022 = n(), .groups = "drop")

hpov.cc.trt.msa <- hpov.cc.trt.msa.2009 %>%
  full_join(hpov.cc.trt.msa.2022, "cbsa10") %>%
  rename(CBSA = cbsa10)

pov.trt.msa <- npov.trt.msa %>%
  full_join(hpov.trt.msa, "CBSA") %>%
  full_join(hpov.cc.trt.msa, "CBSA")

sum(pov.trt.msa$npov_tracts_2009 < pov.trt.msa$hpov_tracts_2009, na.rm=T)
sum(pov.trt.msa$npov_tracts_2022 < pov.trt.msa$hpov_tracts_2022, na.rm=T)

sum(pov.trt.msa$hpov_tracts_2009 < pov.trt.msa$hpov_cc_tracts_2009, na.rm=T)
sum(pov.trt.msa$hpov_tracts_2022 < pov.trt.msa$hpov_cc_tracts_2022, na.rm=T)

## munis ##

length(unique(munis.final.2009$GEOID)) == nrow(munis.final.2009)
range(nchar(munis.final.2009$GEOID))

length(unique(munis.final.2022$GEOID)) == nrow(munis.final.2022)
range(nchar(munis.final.2022$GEOID))

all.munis.final.m <- stata.merge(munis.final.2009,
                                 munis.final.2022,
                                 "GEOID")

## check merge ##
table(all.munis.final.m$merge.variable, useNA = "ifany")

## keep matches ##

all.munis.temp <- all.munis.final.m %>%
  filter(merge.variable == 3) %>%
  select(-merge.variable)

## tests ## 
af.test1 <- all.munis.temp %>%
  filter(GEOID_full.x == GEOID_full.y)

nrow(af.test1)/nrow(all.munis.temp)

af.test2 <- all.munis.temp %>%
  filter(GEOID_full.x != GEOID_full.y) %>%
  mutate(name_test = ifelse(NAME.x == NAME.y, 1, 0),
         GEOID_rd = paste0(substr(GEOID_full.y,1,2),
                           substr(GEOID_full.y,6,10)),
         GEOID_full_test = ifelse(GEOID_full.x == GEOID_rd, 1, 0)) %>%
  select(GEOID,
         GEOID_full.x,
         GEOID_full.y,
         GEOID_rd,
         NAME.x,
         NAME.y,
         GEOID_full_test)

sum(af.test2$GEOID_full_test)/nrow(af.test2)

all.munis.final <- all.munis.temp %>%
  select(-GEOID_full.x,
         -NAME.x) %>%
  rename(GEOID_full = GEOID_full.y,
         NAME = NAME.y) %>%
  select(GEOID,
         GEOID_full,
         NAME,
         everything())

all.munis.final.ids <- all.munis.final %>%
  select(GEOID)

##################################
## (1) create panel zoning data ##
##################################

## muni-level first ##

wrld.nllus.2006.fm <- wrld.nllus.2006.final %>%
  select(-inclusionary_wrld) %>%
  rename_at(vars(-GEOID),function(x) paste0(x,"_2006"))

nzlu.2022.fm <- nzlu.2022.final.zri %>%
  rename_at(vars(-GEOID),function(x) paste0(x,"_2022"))

nzlu.muni.panel.m <- stata.merge(wrld.nllus.2006.fm,
                                 nzlu.2022.fm,
                                 "GEOID")

## check merge ## 
table(nzlu.muni.panel.m$merge.variable, useNA = "ifany")

## keep merge.variable == 2,3 ##

nzlu.muni.panel <- nzlu.muni.panel.m %>%
  filter(merge.variable %in% c(2,3)) %>%
  select(-merge.variable)

## fix GEOID errors ## 
nzlu.muni.panel$GEOID[nzlu.muni.panel$GEOID == "2527100"] <- "2527060"
nzlu.muni.panel$GEOID[nzlu.muni.panel$GEOID == "1571550"] <- "1517000"
nzlu.muni.panel$GEOID[nzlu.muni.panel$GEOID == "2365725"] <- "2365760"
nzlu.muni.panel$GEOID[nzlu.muni.panel$GEOID == "2578972"] <- "2578865"
nzlu.muni.panel$GEOID[nzlu.muni.panel$GEOID == "3983090"] <- "3983111"

save(nzlu.muni.panel,
     file = "100_nzlu_muni_panel.Rda")

## now, msa-level ##

wrld.nllus.msa.2006.fm <- wrld.nllus.msa.2006.temp %>%
  rename_at(vars(-cbsa10),function(x) paste0(x,"_2006"))

nzlu.2022.fm <- nzlu.msa.2022.final %>%
  rename_at(vars(-cbsa10),function(x) paste0(x,"_2022"))

nzlu.msa.panel.m <- stata.merge(wrld.nllus.msa.2006.fm,
                                nzlu.2022.fm,
                                "cbsa10")

## check merge ## 
table(nzlu.msa.panel.m$merge.variable, useNA = "ifany")

## keep merge.variable == 2,3 ##

nzlu.msa.panel <- nzlu.msa.panel.m %>%
  filter(merge.variable %in% c(2,3)) %>%
  select(-merge.variable)

save(nzlu.msa.panel,
    file = "100_nzlu_msa_panel.Rda")

##########################
## (2) process ACS data ##
##########################

sample.msas.0922.m <- stata.merge(msa.0922.fin,
                                      msas.keep,
                                      "CBSA")

## check merge ##
table(sample.msas.0922.m$merge.variable, useNA = "ifany")

## keep matches, final cleaning ##
sample.msas.0922 <- sample.msas.0922.m %>%
  filter(merge.variable == 3) %>%
  select(-merge.variable) %>%
  mutate(log_asian_2009 = case_when(pop_nh_asian_2009 != 0 ~ log(1/per_asian_2009),
                                    pop_nh_asian_2009 == 0 ~ 0),
         log_black_2009 = case_when(pop_nh_black_2009 != 0 ~ log(1/per_black_2009),
                                    pop_nh_black_2009 == 0 ~ 0),
         log_latino_2009 = case_when(pop_h_2009 != 0 ~ log(1/per_latino_2009),
                                     pop_h_2009 == 0 ~ 0),
         log_white_2009 = case_when(pop_nh_white_2009 != 0 ~ log(1/per_white_2009),
                                    pop_nh_white_2009 == 0 ~ 0),
         log_aian_2009 = case_when(pop_nh_aian_2009 != 0 ~ log(1/per_aian_2009),
                                   pop_nh_aian_2009 == 0 ~ 0),
         log_other_2009 = case_when(rowSums(cbind(pop_nh_other_2009, pop_nh_nhpi_2009, pop_nh_multi_2009),na.rm=T) != 0 ~ log(1/per_other_2009),
                                    rowSums(cbind(pop_nh_other_2009, pop_nh_nhpi_2009, pop_nh_multi_2009),na.rm=T) == 0 ~ 0),
         entropy_er_2009 = per_asian_2009*log_asian_2009 + 
                           per_black_2009*log_black_2009 + 
                           per_latino_2009*log_latino_2009 + 
                           per_white_2009*log_white_2009 +
                           per_aian_2009*log_aian_2009 +
                           per_other_2009*log_other_2009,
         log_asian_2022 = case_when(pop_nh_asian_2022 != 0 ~ log(1/per_asian_2022),
                                    pop_nh_asian_2022 == 0 ~ 0),
         log_black_2022 = case_when(pop_nh_black_2022 != 0 ~ log(1/per_black_2022),
                                    pop_nh_black_2022 == 0 ~ 0),
         log_latino_2022 = case_when(pop_h_2022 != 0 ~ log(1/per_latino_2022),
                                     pop_h_2022 == 0 ~ 0),
         log_white_2022 = case_when(pop_nh_white_2022 != 0 ~ log(1/per_white_2022),
                                    pop_nh_white_2022 == 0 ~ 0),
         log_aian_2022 = case_when(pop_nh_aian_2022 != 0 ~ log(1/per_aian_2022),
                                   pop_nh_aian_2022 == 0 ~ 0),
         log_other_2022 = case_when(rowSums(cbind(pop_nh_other_2022, pop_nh_nhpi_2022, pop_nh_multi_2022),na.rm=T) != 0 ~ log(1/per_other_2022),
                                    rowSums(cbind(pop_nh_other_2022, pop_nh_nhpi_2022, pop_nh_multi_2022),na.rm=T) == 0 ~ 0),
         entropy_er_2022 = per_asian_2022*log_asian_2022 + 
           per_black_2022*log_black_2022 + 
           per_latino_2022*log_latino_2022 + 
           per_white_2022*log_white_2022 +
           per_aian_2022*log_aian_2022 +
           per_other_2022*log_other_2022) %>%
  select(CBSA,
         ends_with("_2009"),
         ends_with("_2022"))

#############################
## outcome data processing ##
#############################

## outcome vars in above average poverty tracts ##

npov.muni.2009.fm <- npov.muni.2009 %>%
  rename(rb_2009 = rb,
         mgr_2009 = mgr,
         mpv_2009 = mpv)

npov.muni.2022.fm <- npov.muni.2022 %>%
  rename(rb_2022 = rb,
         mgr_2022 = mgr,
         mpv_2022 = mpv)

npov.muni.fin.m <- stata.merge(npov.muni.2009.fm,
                               npov.muni.2022.fm,
                               c("GEOID_full"))

## diagnose merge ##
table(npov.muni.fin.m$merge.variable, useNA ="ifany")

## keep matches ##
npov.muni.fin <- npov.muni.fin.m %>%
  filter(merge.variable == 3) %>%
  select(-merge.variable)

## outcome vars in high poverty tracts ##

hpov.muni.2009.fm <- hpov.muni.2009 %>%
  rename(rb_2009 = rb,
         mgr_2009 = mgr,
         mpv_2009 = mpv)

hpov.muni.2022.fm <- hpov.muni.2022 %>%
  rename(rb_2022 = rb,
         mgr_2022 = mgr,
         mpv_2022 = mpv)

hpov.muni.fin.m <- stata.merge(hpov.muni.2009.fm,
                               hpov.muni.2022.fm,
                               "GEOID_full")

## diagnose merge ##
table(hpov.muni.fin.m$merge.variable, useNA ="ifany")

## keep all obs ##
hpov.muni.fin <- hpov.muni.fin.m %>%
  filter(merge.variable == 3) %>%
  select(-merge.variable)

## finally, do the same for central city, high poverty tracts ##

hpov.cc.muni.fin <- hpov.muni.fin %>%
  filter(GEOID_full %in% ccs.final$GEOID)

##
## MSA level ##
##

## above-average poverty - 2009 ##

npov.tracts.2009.fin$FIPS <- substr(npov.tracts.2009.fin$GEOID,1,5)

npov.tracts.2009.wcbsa.m <- stata.merge(msa.del.2020.rd,
                                        npov.tracts.2009.fin,
                                        "FIPS")

table(npov.tracts.2009.wcbsa.m$merge.variable, useNA = "ifany")

npov.tracts.2009.wcbsa <- npov.tracts.2009.wcbsa.m %>%
  filter(merge.variable == 3) %>%
  select(-merge.variable,
         -`Metropolitan/Micropolitan Statistical Area`,
         -`Metropolitan Division Code`,
         -`CSA Code`,
         -`CSA Title`,
         -`Metropolitan Division Title`,
         -`County/County Equivalent`,
         -`State Name`,
         -`FIPS State Code`,
         -`FIPS County Code`,
         -`Central/Outlying County`) %>%
  rename(CBSA = cbsa10)

npov.msa.2009.p1 <- npov.tracts.2009.wcbsa %>%
  group_by(CBSA) %>%
  mutate(hpov_rnt_pop = sum(ro_hhlds,na.rm=T),
         wt =  ifelse(hpov_rnt_pop > 0, ro_hhlds/hpov_rnt_pop, 0),
         rb_wt = rb*wt) %>%
  summarize(rb_npov_2009 = sum(rb_wt,na.rm=T),
            wt_sum = sum(wt), .groups = "drop")

summary(npov.msa.2009.p1)

npov.msa.2009.p2 <- npov.tracts.2009.wcbsa %>%
  left_join(all.tracts.2000.pr, "GEOID") %>%
  group_by(CBSA) %>%
  mutate(hpov_rnt_pop = sum(ro_hhlds,na.rm=T),
         wt =  ifelse(hpov_rnt_pop != 0, ro_hhlds/hpov_rnt_pop, 0)) %>%
  summarize_at(c("mgr_ia","mpv_ia","mgr_2000","mpv_2000"),
               funs(weighted.mean(., wt, na.rm=T))) %>% 
  ungroup() %>%
  rename(mgr_npov_2000 = mgr_2000,
         mpv_npov_2000 = mpv_2000,
         mgr_npov_2009 = mgr_ia,
         mpv_npov_2009 = mpv_ia)

summary(npov.msa.2009.p2)

## combine ##
npov.msa.2009.fin <- npov.msa.2009.p1 %>%
  full_join(npov.msa.2009.p2, "CBSA")

nrow(npov.msa.2009.p1)
nrow(npov.msa.2009.p2)
nrow(npov.msa.2009.fin)
summary(npov.msa.2009.fin)

## above-average poverty - 2022 ##

npov.tracts.2022.fin$FIPS <- substr(npov.tracts.2022.fin$GEOID,1,5)

npov.tracts.2022.wcbsa.m <- stata.merge(msa.del.2020.rd,
                                        npov.tracts.2022.fin,
                                        "FIPS")

table(npov.tracts.2022.wcbsa.m$merge.variable, useNA = "ifany")

npov.tracts.2022.wcbsa <- npov.tracts.2022.wcbsa.m %>%
  filter(merge.variable == 3) %>%
  select(-merge.variable,
         -`Metropolitan/Micropolitan Statistical Area`,
         -`Metropolitan Division Code`,
         -`CSA Code`,
         -`CSA Title`,
         -`Metropolitan Division Title`,
         -`County/County Equivalent`,
         -`State Name`,
         -`FIPS State Code`,
         -`FIPS County Code`,
         -`Central/Outlying County`) %>%
  rename(CBSA = cbsa10)

npov.msa.2022.p1 <- npov.tracts.2022.wcbsa %>%
  group_by(CBSA) %>%
  mutate(hpov_rnt_pop = sum(ro_hhlds,na.rm=T),
         wt =  ifelse(hpov_rnt_pop > 0, ro_hhlds/hpov_rnt_pop, 0),
         rb_wt = rb*wt) %>%
  summarize(rb_npov_2022 = sum(rb_wt,na.rm=T),
            wt_sum = sum(wt), .groups = "drop")

summary(npov.msa.2022.p1)

npov.msa.2022.p2 <- npov.tracts.2022.wcbsa %>%
  left_join(all.tracts.2012, "GEOID") %>%
  group_by(CBSA) %>%
  mutate(hpov_rnt_pop = sum(ro_hhlds,na.rm=T),
         wt =  ifelse(hpov_rnt_pop != 0, ro_hhlds/hpov_rnt_pop, 0)) %>%
  summarize_at(c("median_gross_rent","median_prop_value","mgr_2012","mpv_2012"),
               funs(weighted.mean(., wt, na.rm=T))) %>%
  ungroup() %>%
  rename(mgr_npov_2012 = mgr_2012,
         mpv_npov_2012 = mpv_2012,
         mgr_npov_2022 = median_gross_rent,
         mpv_npov_2022 = median_prop_value)

## combine ##
npov.msa.2022.fin <- npov.msa.2022.p1 %>%
  full_join(npov.msa.2022.p2, "CBSA")

nrow(npov.msa.2022.p1)
nrow(npov.msa.2022.p2)
nrow(npov.msa.2022.fin)
summary(npov.msa.2022.fin)

## high-poverty - 2009 ##

hpov.tracts.2009.fin$FIPS <- substr(hpov.tracts.2009.fin$GEOID,1,5)

hpov.tracts.2009.wcbsa.m <- stata.merge(msa.del.2020.rd,
                                        hpov.tracts.2009.fin,
                                        "FIPS")

table(hpov.tracts.2009.wcbsa.m$merge.variable, useNA = "ifany")

hpov.tracts.2009.wcbsa <- hpov.tracts.2009.wcbsa.m %>%
  filter(merge.variable == 3) %>%
  select(-merge.variable,
         -`Metropolitan/Micropolitan Statistical Area`,
         -`Metropolitan Division Code`,
         -`CSA Code`,
         -`CSA Title`,
         -`Metropolitan Division Title`,
         -`County/County Equivalent`,
         -`State Name`,
         -`FIPS State Code`,
         -`FIPS County Code`,
         -`Central/Outlying County`) %>%
  rename(CBSA = cbsa10)

hpov.msa.2009.p1 <- hpov.tracts.2009.wcbsa %>%
  group_by(CBSA) %>%
  mutate(hpov_rnt_pop = sum(ro_hhlds,na.rm=T),
         wt =  ifelse(hpov_rnt_pop > 0, ro_hhlds/hpov_rnt_pop, 0),
         rb_wt = rb*wt) %>%
  summarize(rb_hpov_2009 = sum(rb_wt,na.rm=T),
            wt_sum = sum(wt), .groups = "drop")

summary(hpov.msa.2009.p1)

hpov.msa.2009.p2 <- hpov.tracts.2009.wcbsa %>%
  left_join(all.tracts.2000.pr, "GEOID") %>%
  group_by(CBSA) %>%
  mutate(hpov_rnt_pop = sum(ro_hhlds,na.rm=T),
         wt =  ifelse(hpov_rnt_pop != 0, ro_hhlds/hpov_rnt_pop, 0)) %>%
  summarize_at(c("mgr_ia","mpv_ia","mgr_2000","mpv_2000"),
               funs(weighted.mean(., wt, na.rm=T))) %>%
  ungroup() %>%
  rename(mgr_hpov_2000 = mgr_2000,
         mpv_hpov_2000 = mpv_2000,
         mgr_hpov_2009 = mgr_ia,
         mpv_hpov_2009 = mpv_ia)

## combine ##
hpov.msa.2009.fin <- hpov.msa.2009.p1 %>%
  full_join(hpov.msa.2009.p2, "CBSA")

nrow(hpov.msa.2009.p1)
nrow(hpov.msa.2009.p2)
nrow(hpov.msa.2009.fin)
summary(hpov.msa.2009.fin)

## high-poverty - 2022 ##

hpov.tracts.2022.fin$FIPS <- substr(hpov.tracts.2022.fin$GEOID,1,5)

hpov.tracts.2022.wcbsa.m <- stata.merge(msa.del.2020.rd,
                                        hpov.tracts.2022.fin,
                                        "FIPS")

table(hpov.tracts.2022.wcbsa.m$merge.variable, useNA = "ifany")

hpov.tracts.2022.wcbsa <- hpov.tracts.2022.wcbsa.m %>%
  filter(merge.variable == 3) %>%
  select(-merge.variable,
         -`Metropolitan/Micropolitan Statistical Area`,
         -`Metropolitan Division Code`,
         -`CSA Code`,
         -`CSA Title`,
         -`Metropolitan Division Title`,
         -`County/County Equivalent`,
         -`State Name`,
         -`FIPS State Code`,
         -`FIPS County Code`,
         -`Central/Outlying County`) %>%
  rename(CBSA = cbsa10)

hpov.msa.2022.p1 <- hpov.tracts.2022.wcbsa %>%
  group_by(CBSA) %>%
  mutate(hpov_rnt_pop = sum(ro_hhlds,na.rm=T),
         wt =  ifelse(hpov_rnt_pop > 0, ro_hhlds/hpov_rnt_pop, 0),
         rb_wt = rb*wt) %>%
  summarize(rb_hpov_2022 = sum(rb_wt,na.rm=T),
            wt_sum = sum(wt), .groups = "drop")

hpov.msa.2022.p2 <- hpov.tracts.2022.wcbsa %>%
  left_join(all.tracts.2012, "GEOID") %>%
  group_by(CBSA) %>%
  mutate(hpov_rnt_pop = sum(ro_hhlds,na.rm=T),
         wt =  ifelse(hpov_rnt_pop != 0, ro_hhlds/hpov_rnt_pop, 0)) %>%
  summarize_at(c("median_gross_rent","median_prop_value","mgr_2012","mpv_2012"),
               funs(weighted.mean(., wt, na.rm=T))) %>%
  ungroup() %>%
  rename(mgr_hpov_2012 = mgr_2012,
         mpv_hpov_2012 = mpv_2012,
         mgr_hpov_2022 = median_gross_rent,
         mpv_hpov_2022 = median_prop_value)

hpov.msa.2022.fin <- hpov.msa.2022.p1 %>%
  full_join(hpov.msa.2022.p2, "CBSA")

## combine ##
nrow(hpov.msa.2022.p1)
nrow(hpov.msa.2022.p2)
nrow(hpov.msa.2022.fin)
summary(hpov.msa.2022.fin)

## high poverty central city - 2009 ##

hpov.tracts.2009.fin.cc <- stata.merge(hpov.tracts.2009.fin,
                                       tract.to.pl.metro,
                                       "GEOID")

## check merge ##
table(hpov.tracts.2009.fin.cc$merge.variable, useNA = "ifany")

hpov.cc.msa.2009.p1 <- hpov.tracts.2009.fin.cc %>%
  filter(merge.variable == 3 | GEOID == "39049008230") %>%
  mutate(GEOID_muni_full = paste0(substr(FIPS.x,1,2),placefp)) %>%
  select(GEOID_muni_full,
         GEOID) %>%
  filter(GEOID_muni_full %in% ccs.final$GEOID) %>%
  inner_join(hpov.tracts.2009.wcbsa, "GEOID") %>%
  group_by(CBSA) %>%
  mutate(hpov_rnt_pop = sum(ro_hhlds,na.rm=T),
         wt =  ifelse(hpov_rnt_pop > 0, ro_hhlds/hpov_rnt_pop, 0),
         rb_wt = rb*wt) %>%
  summarize(GEOID_muni_full = first(GEOID_muni_full),
            rb_hpov_cc_2009 = sum(rb_wt,na.rm=T),
            wt_sum = sum(wt), .groups = "drop")

summary(hpov.cc.msa.2009.p1)

hpov.cc.msa.2009.p2 <- hpov.tracts.2009.fin.cc %>%
  filter(merge.variable == 3 | GEOID == "39049008230") %>%
  mutate(GEOID_muni_full = paste0(substr(FIPS.x,1,2),placefp)) %>%
  select(GEOID_muni_full,
         GEOID) %>%
  filter(GEOID_muni_full %in% ccs.final$GEOID) %>%
  inner_join(hpov.tracts.2009.wcbsa, "GEOID") %>%
  left_join(all.tracts.2000.pr, "GEOID") %>%
  group_by(CBSA) %>%
  mutate(hpov_rnt_pop = sum(ro_hhlds,na.rm=T),
         wt =  ifelse(hpov_rnt_pop != 0, ro_hhlds/hpov_rnt_pop, 0)) %>%
  summarize_at(c("mgr_ia","mpv_ia","mgr_2000","mpv_2000"),
               funs(weighted.mean(., wt, na.rm=T))) %>%
  ungroup() %>%
  rename(mgr_hpov_cc_2000 = mgr_2000,
         mpv_hpov_cc_2000 = mpv_2000,
         mgr_hpov_cc_2009 = mgr_ia,
         mpv_hpov_cc_2009 = mpv_ia)

## combine ##
hpov.cc.msa.2009.fin <- hpov.cc.msa.2009.p1 %>%
  full_join(hpov.cc.msa.2009.p2, "CBSA")

nrow(hpov.cc.msa.2009.p1)
nrow(hpov.cc.msa.2009.p2)
nrow(hpov.cc.msa.2009.fin)
summary(hpov.cc.msa.2009.fin)

## high poverty central city - 2022 ##

hpov.tracts.2022.fin.cc <- stata.merge(hpov.tracts.2022.fin,
                                       tract.to.pl.metro,
                                       "GEOID")

## check merge ##
table(hpov.tracts.2022.fin.cc$merge.variable, useNA = "ifany")

hpov.cc.msa.2022.p1 <- hpov.tracts.2022.fin.cc %>%
  filter(merge.variable == 3 | GEOID == "39049008230") %>%
  mutate(GEOID_muni_full = paste0(substr(FIPS.x,1,2),placefp)) %>%
  select(GEOID_muni_full,
         GEOID) %>%
  filter(GEOID_muni_full %in% ccs.final$GEOID) %>%
  inner_join(hpov.tracts.2022.wcbsa, "GEOID") %>%
  group_by(CBSA) %>%
  mutate(hpov_rnt_pop = sum(ro_hhlds,na.rm=T),
         wt =  ifelse(hpov_rnt_pop > 0, ro_hhlds/hpov_rnt_pop, 0),
         rb_wt = rb*wt) %>%
  summarize(GEOID_muni_full = first(GEOID_muni_full),
            rb_hpov_cc_2022 = sum(rb_wt,na.rm=T),
            wt_sum = sum(wt), .groups = "drop")

summary(hpov.cc.msa.2022.p1)

hpov.cc.msa.2022.p2 <- hpov.tracts.2022.fin.cc %>%
  filter(merge.variable == 3 | GEOID == "39049008230") %>%
  mutate(GEOID_muni_full = paste0(substr(FIPS.x,1,2),placefp)) %>%
  select(GEOID_muni_full,
         GEOID) %>%
  filter(GEOID_muni_full %in% ccs.final$GEOID) %>%
  inner_join(hpov.tracts.2022.wcbsa, "GEOID") %>%
  left_join(all.tracts.2012, "GEOID") %>%
  group_by(CBSA) %>%
  mutate(hpov_rnt_pop = sum(ro_hhlds,na.rm=T),
         wt =  ifelse(hpov_rnt_pop != 0, ro_hhlds/hpov_rnt_pop, 0)) %>%
  summarize_at(c("median_gross_rent","median_prop_value","mgr_2012","mpv_2012"),
               funs(weighted.mean(., wt, na.rm=T))) %>%
  ungroup() %>%
  rename(mgr_hpov_cc_2012 = mgr_2012,
         mpv_hpov_cc_2012 = mpv_2012,
         mgr_hpov_cc_2022 = median_gross_rent,
         mpv_hpov_cc_2022 = median_prop_value)

hpov.cc.msa.2022.fin <- hpov.cc.msa.2022.p1 %>%
  full_join(hpov.cc.msa.2022.p2, "CBSA")

## combine ##
nrow(hpov.cc.msa.2022.p1)
nrow(hpov.cc.msa.2022.p2)
nrow(hpov.cc.msa.2022.fin)
summary(hpov.cc.msa.2022.fin)

## check varnames ##

names(npov.msa.2009.fin)
names(npov.msa.2022.fin)

names(hpov.msa.2009.fin)
names(hpov.msa.2022.fin)

names(hpov.cc.msa.2009.fin)
names(hpov.cc.msa.2022.fin)

## merges ##

## impoverished tracts ##

npov.msa.fin.m <- stata.merge(npov.msa.2009.fin,
                              npov.msa.2022.fin,
                              "CBSA")

## diagnose merge ##
table(npov.msa.fin.m$merge.variable, useNA ="ifany")


hpov.msa.fin.m <- stata.merge(hpov.msa.2009.fin,
                              hpov.msa.2022.fin,
                              "CBSA")

## diagnose merge ##
table(hpov.msa.fin.m$merge.variable, useNA ="ifany")

hpov.cc.msa.fin.m <- stata.merge(hpov.cc.msa.2009.fin,
                                 hpov.cc.msa.2022.fin,
                                 "CBSA")

## diagnose merge ##
table(hpov.cc.msa.fin.m$merge.variable, useNA ="ifany")

## keep matches ##
npov.msa.fin <- npov.msa.fin.m %>%
  filter(merge.variable == 3) %>%
  select(-merge.variable) %>%
  select(CBSA,
         mgr_npov_2000,
         mpv_npov_2000,
         mgr_npov_2012,
         mpv_npov_2012,
         rb_npov_2009,
         mgr_npov_2009,
         mpv_npov_2009,
         rb_npov_2022,
         mgr_npov_2022,
         mpv_npov_2022)

summary(npov.msa.fin)

hpov.msa.fin <- hpov.msa.fin.m %>%
  filter(merge.variable == 3) %>%
  select(-merge.variable) %>%
  select(CBSA,
         mgr_hpov_2000,
         mpv_hpov_2000,
         mgr_hpov_2012,
         mpv_hpov_2012,
         rb_hpov_2009,
         mgr_hpov_2009,
         mpv_hpov_2009,
         rb_hpov_2022,
         mgr_hpov_2022,
         mpv_hpov_2022)

summary(hpov.msa.fin)

hpov.cc.msa.fin <- hpov.cc.msa.fin.m %>%
  filter(merge.variable == 3) %>%
  select(-merge.variable) %>%
  select(CBSA,
         mgr_hpov_cc_2000,
         mpv_hpov_cc_2000,
         mgr_hpov_cc_2012,
         mpv_hpov_cc_2012,
         rb_hpov_cc_2009,
         mgr_hpov_cc_2009,
         mpv_hpov_cc_2009,
         rb_hpov_cc_2022,
         mgr_hpov_cc_2022,
         mpv_hpov_cc_2022)

summary(hpov.cc.msa.fin)


##########################
## merge to zoning data ##
##########################

## merge munis to zoning data ##

af.muni.m <- stata.merge(all.munis.final,
                         nzlu.muni.panel,
                         "GEOID")

## check merge ##
table(af.muni.m$merge.variable, useNA = "ifany")

## keep matches ##
af.muni.init <- af.muni.m %>%
  filter(merge.variable == 3) %>%
  select(-merge.variable) %>%
  rename(rb_2009_muniwide = rb_2009,
         rb_2022_muniwide = rb_2022)

###############################
## merge on rent burden data ##
###############################

length(unique(af.muni.init$GEOID)) == nrow(af.muni.init)
range(nchar(trim(af.muni.init$GEOID)))
range(nchar(trim(af.muni.init$GEOID_full)))

length(unique(npov.muni.fin$GEOID_full)) == nrow(npov.muni.fin)
range(nchar(trim(npov.muni.fin$GEOID_full)))
 
## above average ##
af.muni.m1 <- stata.merge(af.muni.init,
                          npov.muni.fin,
                          "GEOID_full")

## diagnose merge ##
table(af.muni.m1$merge.variable, useNA = "ifany")

## keep 1,3 values ##
af.muni.k1 <- af.muni.m1 %>%
  filter(merge.variable %in% c(1,3)) %>%
  select(-merge.variable) %>%
  rename(rb_npov_2009 = rb_2009,
         rb_npov_2022 = rb_2022,
         mgr_npov_2000 = mgr_2000,
         mgr_npov_2009 = mgr_2009,
         mgr_npov_2012 = mgr_2012,
         mgr_npov_2022 = mgr_2022,
         mpv_npov_2000 = mpv_2000,
         mpv_npov_2009 = mpv_2009,
         mpv_npov_2012 = mpv_2012,
         mpv_npov_2022 = mpv_2022)

## high poverty ##
af.muni.m2 <- stata.merge(af.muni.k1,
                          hpov.muni.fin,
                          "GEOID_full")

## diagnose merge ##
table(af.muni.m2$merge.variable, useNA = "ifany")

## keep 1,3 values ##
af.muni.k2 <- af.muni.m2 %>%
  filter(merge.variable %in% c(1,3)) %>%
  select(-merge.variable) %>%
  rename(rb_hpov_2009 = rb_2009,
         rb_hpov_2022 = rb_2022,
         mgr_hpov_2000 = mgr_2000,
         mgr_hpov_2009 = mgr_2009,
         mgr_hpov_2012 = mgr_2012,
         mgr_hpov_2022 = mgr_2022,
         mpv_hpov_2000 = mpv_2000,
         mpv_hpov_2009 = mpv_2009,
         mpv_hpov_2012 = mpv_2012,
         mpv_hpov_2022 = mpv_2022)

## high poverty central city ##

af.muni.m3 <- stata.merge(af.muni.k2,
                          hpov.cc.muni.fin,
                          "GEOID_full")

## diagnose merge ##
table(af.muni.m3$merge.variable, useNA = "ifany")

af.muni <- af.muni.m3 %>%
  filter(merge.variable %in% c(1,3)) %>%
  select(-merge.variable) %>%
  rename(rb_hpov_cc_2009 = rb_2009,
         rb_hpov_cc_2022 = rb_2022,
         mgr_hpov_cc_2000 = mgr_2000,
         mgr_hpov_cc_2009 = mgr_2009,
         mgr_hpov_cc_2012 = mgr_2012,
         mgr_hpov_cc_2022 = mgr_2022,
         mpv_hpov_cc_2000 = mpv_2000,
         mpv_hpov_cc_2009 = mpv_2009,
         mpv_hpov_cc_2012 = mpv_2012,
         mpv_hpov_cc_2022 = mpv_2022) %>%
  mutate( ## relative measures - mgr ##
    mgr_npov_rel_2009 = ifelse(mgr_npov_2000 > 0, mgr_npov_2009/mgr_npov_2000, NA),
    mgr_npov_rel_2022 = ifelse(mgr_npov_2012 > 0, mgr_npov_2022/mgr_npov_2012, NA),
    mgr_hpov_rel_2009 = ifelse(mgr_hpov_2000 > 0, mgr_hpov_2009/mgr_hpov_2000, NA),
    mgr_hpov_rel_2022 = ifelse(mgr_hpov_2012 > 0, mgr_hpov_2022/mgr_hpov_2012, NA),
    mgr_hpov_cc_rel_2009 = ifelse(mgr_hpov_cc_2000 > 0, mgr_hpov_cc_2009/mgr_hpov_cc_2000, NA),
    mgr_hpov_cc_rel_2022 = ifelse(mgr_hpov_cc_2012 > 0, mgr_hpov_cc_2022/mgr_hpov_cc_2012, NA),
    ## relative measures - mpv ##
    mpv_npov_rel_2009 = ifelse(mpv_npov_2000 > 0, mpv_npov_2009/mpv_npov_2000,NA),
    mpv_npov_rel_2022 = ifelse(mpv_npov_2012 > 0, mpv_npov_2022/mpv_npov_2012,NA),
    mpv_hpov_rel_2009 = ifelse(mpv_hpov_2000 > 0, mpv_hpov_2009/mpv_hpov_2000, NA),
    mpv_hpov_rel_2022 = ifelse(mpv_hpov_2012 > 0, mpv_hpov_2022/mpv_hpov_2012,NA),
    mpv_hpov_cc_rel_2009 = ifelse(mpv_hpov_cc_2000 > 0, mpv_hpov_cc_2009/mpv_hpov_cc_2000,NA),
    mpv_hpov_cc_rel_2022 = ifelse(mpv_hpov_cc_2012 > 0, mpv_hpov_cc_2022/mpv_hpov_cc_2012, NA))

## fix missing values ## 

af.muni.miss <- af.muni %>%
  filter(is.na(median_gross_rent_2009) | 
         is.na(median_yr_str_2009) |
         is.na(pop_density_2009) | 
         is.na(median_gross_rent_2022) | 
         is.na(median_prop_value_2022) | 
         is.na(median_yr_str_2022) | 
         is.na(median_hhld_inc_2022)) %>%
  select(GEOID,
         NAME,
         median_gross_rent_2009,
         median_yr_str_2009,
         pop_density_2009,
         rb_2009_muniwide,
         median_gross_rent_2022,
         median_prop_value_2022,
         median_hhld_inc_2022,
         median_yr_str_2022)

## solution -> impute values from 2006-2010 ACS estimates ##

## median gross rent (2009) ## 
af.muni$median_gross_rent_2009[af.muni$GEOID == "3901672"] <- 1396 * 437.6/321.5
af.muni$median_gross_rent_2009[af.muni$GEOID == "3976582"] <- 936 * 437.6/321.5
af.muni$median_gross_rent_2009[af.muni$GEOID == "3643005"] <- 490 * 437.6/321.5
af.muni$median_gross_rent_2009[af.muni$GEOID == "0813845"] <- 1528 * 437.6/321.5
af.muni$median_gross_rent_2009[af.muni$GEOID == "3925802"] <- 1021 * 437.6/321.5
af.muni$median_gross_rent_2009[af.muni$GEOID == "2713168"] <- 1721 * 437.6/321.5
af.muni$median_gross_rent_2009[af.muni$GEOID == "1719083"] <- 2001 * 437.6/321.5
af.muni$median_gross_rent_2009[af.muni$GEOID == "1233600"] <- (2001+534+830+765+786)/5 * 437.6/316.7 ## average of surrounding 5 2005-2009 ACS estimates ##
af.muni$median_gross_rent_2009[af.muni$GEOID == "2751316"] <- (873+1066+771+649)/4 * 437.6/316.7 ## average of surrounding 4 2005-2009 ACS estimates ##
af.muni$median_gross_rent_2009[af.muni$GEOID == "5511775"] <- (923+675+744+474+920+475)/6 * 437.6/321.5 ## average of surrounding 6 2006-2010 ACS estimates ##

## pop density (2009) ## 
af.muni$pop_density_2009[af.muni$GEOID == "2148006"] <- 1804.8
af.muni$pop_density_2009[af.muni$GEOID == "2756724"] <- 108.6
af.muni$pop_density_2009[af.muni$GEOID == "1319000"] <- 867.1
af.muni$pop_density_2009[af.muni$GEOID == "4250528"] <- 1434.1
af.muni$pop_density_2009[af.muni$GEOID == "1228452"] <- 8719.2
af.muni$pop_density_2009[af.muni$GEOID == "2718662"] <- 1079.7
af.muni$pop_density_2009[af.muni$GEOID == "2503690"] <- 765.4
af.muni$pop_density_2009[af.muni$GEOID == "4847337"] <- 4993.8
af.muni$pop_density_2009[af.muni$GEOID == "1243083"] <- 1366
af.muni$pop_density_2009[af.muni$GEOID == "5577975"] <- 815.7
af.muni$pop_density_2009[af.muni$GEOID == "5554875"] <- 354.4
af.muni$pop_density_2009[af.muni$GEOID == "5540550"] <- 161.5
af.muni$pop_density_2009[af.muni$GEOID == "5535150"] <- 306.8
af.muni$pop_density_2009[af.muni$GEOID == "3981718"] <- 1612.1
af.muni$pop_density_2009[af.muni$GEOID == "2577890"] <- 1718.7
af.muni$pop_density_2009[af.muni$GEOID == "0513450"] <- 247.8

## median year of structure (2009) ##
af.muni$median_yr_str_2009[af.muni$GEOID == "2700730"] <- 1999

## median gross rent (2022) ## 
af.muni$median_gross_rent_2022[af.muni$GEOID == "0940290"] <- 898*(437.6/383.2)
af.muni$median_gross_rent_2022[af.muni$GEOID == "1715235"] <- 935*(437.6/383.2)
af.muni$median_gross_rent_2022[af.muni$GEOID == "1737608"] <- 1828*(437.6/410.8)
af.muni$median_gross_rent_2022[af.muni$GEOID == "2201885"] <- 836*(437.6/383.2)
af.muni$median_gross_rent_2022[af.muni$GEOID == "2379550"] <- 282*(437.6/410.8)
af.muni$median_gross_rent_2022[af.muni$GEOID == "2627960"] <- 1259*(437.6/410.8)
af.muni$median_gross_rent_2022[af.muni$GEOID == "2745754"] <- 1802*(437.6/410.8)
af.muni$median_gross_rent_2022[af.muni$GEOID == "2734244"] <- 1189*(437.6/410.8)
af.muni$median_gross_rent_2022[af.muni$GEOID == "3454570"] <- 2639*(437.6/410.8)
af.muni$median_gross_rent_2022[af.muni$GEOID == "3645997"] <- 2510*(437.6/410.8)
af.muni$median_gross_rent_2022[af.muni$GEOID == "4727020"] <- 3501*(437.6/410.8)
af.muni$median_gross_rent_2022[af.muni$GEOID == "4871960"] <- 2555*(437.6/410.8)
af.muni$median_gross_rent_2022[af.muni$GEOID == "5555075"] <- 953*(437.6/410.8)

## median property value (2022) ##
af.muni$median_prop_value_2022[af.muni$GEOID == "4813732"] <- 62100*(437.6/410.8)

## median hhld inc (2022) ##
af.muni$median_hhld_inc_2022[af.muni$GEOID == "4825488"] <- 41774*(437.6/383.2)

## median yr str (2022) ##
af.muni$median_yr_str_2022[af.muni$GEOID == "1735866"] <- 1955

## check missings ##
## the 1 missing rb_2009_muniwide value is valid ##
## the 6 missing median_gross_rent_2022 values are valid ##

sapply(af.muni, function(x) sum(is.na(x)))

## reformat Greenfield, MA GEOID so that spatial merge works ##

af.muni$GEOID[af.muni$GEOID == "2527060"] <- "2527100"
af.muni$GEOID_full[af.muni$GEOID_full == "2501127060"] <- "2501127100"

## limit muni analytic file to metro munis ##

## start by making the muni to msa file unique by muni ID ##
## when aggregating to metro level, munis will be duplicated (munis in multiple metros) 
## but here, just need munis in at least one metro area 

muni.msa.2009.unids <- muni.msa.2009 %>%
  select(GEOID) %>%
  unique()

af.muni.metro.m <- stata.merge(af.muni,
                               muni.msa.2009.unids, 
                               "GEOID")

## check merge ##
table(af.muni.metro.m$merge.variable, useNA = "ifany")

af.muni.metro <- af.muni.metro.m %>%
  filter(merge.variable == 3) %>%
  select(-merge.variable)

## do this again with 2022 file just as a check ##

muni.msa.2022.unids <- muni.msa.2022 %>%
  select(GEOID) %>%
  unique()

af.muni.metro.m2.check <- stata.merge(af.muni,
                                      muni.msa.2022.unids,
                                      "GEOID")

## check merge ##
table(af.muni.metro.m2.check$merge.variable, useNA = "ifany")

af.muni.metro.m2.check <- af.muni.metro.m2.check %>%
  filter(merge.variable == 3) %>%
  select(GEOID)

af.muni.fin.check <- stata.merge(af.muni.metro,
                                 af.muni.metro.m2.check,
                                 "GEOID")

## check merge ##
table(af.muni.fin.check$merge.variable, useNA = "ifany")

zri.t1 <- af.muni.metro %>%
  select(GEOID_full,
         zri_st_2006)

zri.t2 <- af.muni.metro %>%
  select(GEOID_full,
         zri_st_2022)

save(af.muni.metro,
     file = "100_af_muni.Rda")

write.csv(zri.t1, 
          file = "100_zri_t1.csv")

write.csv(zri.t2, 
          file = "100_zri_t2.csv")

#####################
## create MSA file ##
#####################

## create muni to CBSA mapping ##

msa.names <- msa.del.2020.rd %>%
  select(`CBSA Code`, 
         `CBSA Title`) %>%
  rename(CBSA = `CBSA Code`,
         cbsaname = `CBSA Title`) %>%
  distinct()

msa.panel.wname.m <- stata.merge(msas.keep,
                                 msa.names,
                                 "CBSA")

table(msa.panel.wname.m$merge.variable)

msa.panel.wname <- msa.panel.wname.m %>%
  filter(merge.variable == 3) %>%
  select(-merge.variable)

## ACS vars ##
msa.panel.m0 <- stata.merge(msa.panel.wname,
                            msa.0922.fin,
                            "CBSA")

## check merge ## 
table(msa.panel.m0$merge.variable)

## keep matches ## 
msa.panel.k0 <- msa.panel.m0 %>%
  filter(merge.variable == 3) %>%
  select(-merge.variable)

## remaining outcomes outcomes ##
  
## above average ##
msa.panel.m1 <- stata.merge(msa.panel.k0,
                            npov.msa.fin,
                            "CBSA")

## check merge ## 
table(msa.panel.m1$merge.variable)

msa.panel.k1 <- msa.panel.m1 %>%
  filter(merge.variable %in% c(1,3)) %>%
  select(-merge.variable)

## high poverty ##

msa.panel.m2 <- stata.merge(msa.panel.k1,
                            hpov.msa.fin,
                            "CBSA")

## check merge ## 
table(msa.panel.m2$merge.variable)

msa.panel.k2 <- msa.panel.m2 %>%
  filter(merge.variable %in% c(1,3)) %>%
  select(-merge.variable)

## high poverty central city ##

msa.panel.m3 <- stata.merge(msa.panel.k2,
                            hpov.cc.msa.fin,
                            "CBSA")
 
## check merge ## 
table(msa.panel.m3$merge.variable)

msa.panel.k3 <- msa.panel.m3 %>%
  filter(merge.variable %in% c(1,3)) %>%
  select(-merge.variable)

## munis per msa ##

msa.panel.m4 <- stata.merge(msa.panel.k3,
                            munis.per,
                            "CBSA")

## check merge ## 
table(msa.panel.m4$merge.variable)

## keep 1, 3 ##
af.msa <- msa.panel.m4 %>%
  filter(merge.variable %in% c(1,3)) %>%
  select(-merge.variable)

sapply(af.msa, function(x) sum(is.na(x)))

save(af.msa,
     file = "100_af_msa.Rda")

## END OF PROGRAM ##

#sink()